First we have to prepare the data.

# import the csv with the articles
PP_NYT <- read.csv("/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Data/PP_NYT.csv")

# subset the variables that are meaningful, create new dataset to work with
pp <- subset(PP_NYT, select=c(DATE, TITLE, LENGTH, GRAPHIC, SECTION, BYLINE, DATELINE, TEXT))
names(pp)
## [1] "DATE"     "TITLE"    "LENGTH"   "GRAPHIC"  "SECTION"  "BYLINE"  
## [7] "DATELINE" "TEXT"
# create a new date variable that will represent the article date in YYYY-MM-DD format
date <- as.character(pp$DATE)
betterDates <- as.Date(date, format = "%B %d, %Y")
pp$date.num <- betterDates

# create a variable for just the year each article was published
year = NULL
pp$year <- substr(pp$date.num, 1, 4)

# create a variable for the year and month each article was published
yearmonth = NULL
pp$yearmonth <- substr(pp$date.num, 1, 7)

Topic Modeling Analysis

Now we’re going to look at the polarity of the articles on Planned Parenthood, as it changes over time. Though we’re using this analysis on Planned Parenthood articles, we could really use this on any corpus, to analyze how the polarity of a set of documents have changed over a given variable—be it time, from document to document, by person, and so on.

set.seed(1234)

# load the libraries we will need for this section
library(mallet) # a wrapper around the Java machine learning tool MALLET
## Loading required package: rJava
library(wordcloud) # to visualize wordclouds
## Loading required package: RColorBrewer
# subset the data for 2009 and later, for an initial analysis of the topic models
pp.2010 <- subset(pp, pp$year > 2009)

# we first have to create an 'id' column
pp.2010$id <- rownames(pp.2010)

# remove punctuation
pp.2010$TEXT <- gsub(pattern="[[:punct:]]",replacement=" ",pp.2010$TEXT)


# load data into mallet
mallet.instances <- mallet.import(pp.2010$id, pp.2010$TEXT, "/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Data/stoplist.csv", FALSE, token.regexp="[\\p{L}']+")

# choose the number of topics to model
n.topics = 10

# create a topic trainer object
topic.model <- MalletLDA(n.topics)

# load the documents
topic.model$loadDocuments(mallet.instances)

# get the vocabulary, and some statistics about word frequencies; after running this code once through, i went back and re-curated the stop word lists, to remove some of the more frequently used words that weren't otherwise caught
vocabulary <- topic.model$getVocabulary()
word.freqs <- mallet.word.freqs(topic.model)

# examine some of the vocabulary
word.freqs[1:50,]
##             words term.freq doc.freq
## 1          weight         3        2
## 2       political      1019      478
## 3        universe        15       13
## 4       shoulders        13       13
## 5      candidates       662      253
## 6         running       315      203
## 7          senate       984      315
## 8            seat       138       69
## 9            long       638      407
## 10           held       354      270
## 11         edward        37       36
## 12        kennedy       117       58
## 13       embarked         8        8
## 14       frenzied         3        3
## 15            day       668      380
## 16    campaigning        43       36
## 17         monday       301      188
## 18         groups       550      302
## 19          sides       138      111
## 20         health      2165      678
## 21           care      1324      563
## 22         debate       938      353
## 23        flooded        10        8
## 24          state      2436      621
## 25          money       831      370
## 26 advertisements        30       26
## 27         ground       161      115
## 28         troops        46       28
## 29      influence        64       55
## 30        outcome        48       43
## 31       frenetic         3        3
## 32            end       501      342
## 33           race       470      241
## 34     originally        26       25
## 35        thought       217      161
## 36       cakewalk         2        2
## 37         martha        27       20
## 38        coakley        24       10
## 39     democratic       729      311
## 40       attorney       144       95
## 41        general       359      223
## 42  massachusetts       226      112
## 43 overwhelmingly        25       22
## 44          polls       210      126
## 45         showed       163      132
## 46          scott       137      103
## 47          brown       175       69
## 48     republican      2627      632
## 49        senator       808      331
## 50         closed       101       78
# the most frequently used words
word.freqs.ordered <- word.freqs[order(-word.freqs$term.freq), ]
head(word.freqs.ordered)
##          words term.freq doc.freq
## 304   abortion      4148      640
## 661      women      3961      709
## 282    planned      3268     1285
## 283 parenthood      3173     1276
## 48  republican      2627      632
## 24       state      2436      621
# optimize hyperparameters every 20 iterations, after 50 burn-in iterations
topic.model$setAlphaOptimization(20, 50)

# now train a model, specifying the number of iterations
topic.model$train(100)

# get the probability of topics in documents and the probability of words in topics; by default the functions return word counts, so to get the probabilities we can normalize and add smoothing, in order to ensure that nothing has a probability of exactly 0
doc.topics <- mallet.doc.topics(topic.model, smoothed=T, normalized=T)
topic.words <- mallet.topic.words(topic.model, smoothed=T, normalized=T)

# what are the top words in topic 5?
mallet.top.words(topic.model, topic.words[4,])
##        words     weights
## 1     street 0.010344397
## 2       city 0.010193410
## 3     church 0.008457059
## 4   brooklyn 0.007664378
## 5        art 0.005588307
## 6     public 0.004795625
## 7       john 0.004493651
## 8      local 0.004267171
## 9     avenue 0.004191677
## 10 president 0.003927450
# create a vector containing short names for the topics
topics.labels <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels[topic] <- paste(mallet.top.words(topic.model, topic.words[topic,], num.top.words=5)$words, collapse=" ")

# have a look at keywords for each topic
topics.labels
##  [1] "abortion women state law court"           
##  [2] "life people religious make good"          
##  [3] "house republican republicans senate bill" 
##  [4] "street city church brooklyn art"          
##  [5] "people man day told gun"                  
##  [6] "republican women campaign romney party"   
##  [7] "planned parenthood health services care"  
##  [8] "family planned school years parenthood"   
##  [9] "obama president romney federal government"
## [10] "women sex education school young"
# show the first few document titles with at least .25 of its content devoted to topic 1
head(pp.2010$TITLE[ doc.topics[1,] > 0.25 ],10)
##  [1] After Long Decline, Teenage Pregnancy Rate Rises                       
##  [2] To Court Blacks, Foes of Abortion Make Racial Case                     
##  [3] New Spending for a Wider Range of Sex Education                        
##  [4] Paid Notice: Deaths SLOAN, LISA                                        
##  [5] The New Abortion Providers                                             
##  [6] Planned Parenthood Clinics Are Stripped of Affiliation After Complaints
##  [7] A Hidden Minefield at Pregnancy Centers                                
##  [8] Reproductive Choices Women Face                                        
##  [9] Planned Parenthood Fires Employee After Video                          
## [10] Women and Abortion                                                     
## 3791 Levels: 'CONSCIENCE' OF CONSERVATIVES GOES ON THE ATTACK ...
# create a vector that has the title of the most representative text for each topic
topics.articles <- rep("", n.topics)
for (i in 1:n.topics) topics.articles[i] <- paste(pp.2010[which.max(doc.topics[i, ]), ]$TITLE)

# weirdly, many of the topics have the same text that is most representative
topics.articles
##  [1] "After Long Decline, Teenage Pregnancy Rate Rises"                     
##  [2] "The Candidates, and Supporters From All Over, Push to the Finish Line"
##  [3] "From High Jinks to Handcuffs"                                         
##  [4] "From High Jinks to Handcuffs"                                         
##  [5] "From High Jinks to Handcuffs"                                         
##  [6] "As Lender, Giannoulias Impacted Bank Woes"                            
##  [7] "Ruth P. Smith, 102; Abortion-Rights Pioneer"                          
##  [8] "Ruth P. Smith, 102; Abortion-Rights Pioneer"                          
##  [9] "Ruth P. Smith, 102; Abortion-Rights Pioneer"                          
## [10] "From High Jinks to Handcuffs"
# now let's look at how topics differ across different years?
topic.words.2009 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2009, smoothed=T, normalized=T)
topic.words.2010 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2010, smoothed=T, normalized=T)
topic.words.2011 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2011, smoothed=T, normalized=T)
topic.words.2012 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2012, smoothed=T, normalized=T)
topic.words.2013 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2013, smoothed=T, normalized=T)
topic.words.2014 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2014, smoothed=T, normalized=T)
topic.words.2015 <- mallet.subset.topic.words(topic.model, pp.2010$year == 2015, smoothed=T, normalized=T)

topics.labels.2010 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2010[topic] <- paste(mallet.top.words(topic.model, topic.words.2010[topic,], num.top.words=5)$words, collapse=" ")
topics.labels.2010
##  [1] "abortion women abortions doctors clinics"        
##  [2] "life people make control nietzsche"              
##  [3] "republican house senate senator cruz"            
##  [4] "street art museum brooklyn city"                 
##  [5] "keefe wetmore people black solondz"              
##  [6] "campaign political brown conservative democratic"
##  [7] "planned parenthood health organization services" 
##  [8] "family godfrey planned years fellowship"         
##  [9] "bank president military giannoulias money"       
## [10] "sex education abstinence university institute"
topics.labels.2011 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2011[topic] <- paste(mallet.top.words(topic.model, topic.words.2011[topic,], num.top.words=5)$words, collapse=" ")
topics.labels.2011
##  [1] "abortion women state law abortions"         
##  [2] "people life vernacchio make good"           
##  [3] "house republicans republican democrats bill"
##  [4] "street weiner city east tanton"             
##  [5] "keefe man people told asked"                
##  [6] "republican party voters political campaign" 
##  [7] "planned parenthood health services money"   
##  [8] "school planned law parenthood husband"      
##  [9] "budget obama cuts federal president"        
## [10] "sex women education school sexual"
topics.labels.2012 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2012[topic] <- paste(mallet.top.words(topic.model, topic.words.2012[topic,], num.top.words=5)$words, collapse=" ")
topics.labels.2012
##  [1] "abortion women state law court"                
##  [2] "religious life catholic people control"        
##  [3] "house republican republicans senate bill"      
##  [4] "street city brinker foundation vaughn"         
##  [5] "people told day man room"                      
##  [6] "romney women republican campaign voters"       
##  [7] "planned parenthood komen health cancer"        
##  [8] "children school kimbrough family university"   
##  [9] "romney obama president administration santorum"
## [10] "women sex students education young"
topics.labels.2013 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2013[topic] <- paste(mallet.top.words(topic.model, topic.words.2013[topic,], num.top.words=5)$words, collapse=" ")
topics.labels.2013
##  [1] "abortion women state abortions texas"         
##  [2] "people life time religious gay"               
##  [3] "bill senate republican house republicans"     
##  [4] "church brooklyn queens city thompson"         
##  [5] "people day home told play"                    
##  [6] "republican party campaign voters women"       
##  [7] "parenthood health planned cancer breast"      
##  [8] "planned family mother parenthood died"        
##  [9] "obama president administration government tax"
## [10] "women gilbert sex percent education"
topics.labels.2014 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2014[topic] <- paste(mallet.top.words(topic.model, topic.words.2014[topic,], num.top.words=5)$words, collapse=" ")
topics.labels.2014
##  [1] "abortion women court law state"              
##  [2] "religious life book good time"               
##  [3] "republican house christie republicans senate"
##  [4] "sage city art dance ned"                     
##  [5] "dunham people woman day man"                 
##  [6] "women republican voters democratic election" 
##  [7] "planned parenthood health services care"     
##  [8] "years planned ny husband children"           
##  [9] "insurance obama care pay president"          
## [10] "women sex school data found"
topics.labels.2015 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2015[topic] <- paste(mallet.top.words(topic.model, topic.words.2015[topic,], num.top.words=5)$words, collapse=" ")
topics.labels.2015
##  [1] "abortion women law court state"              
##  [2] "people life make good talk"                  
##  [3] "republican trump republicans house senator"  
##  [4] "deace street art church city"                
##  [5] "people gun shooting police colorado"         
##  [6] "campaign fiorina republican party candidates"
##  [7] "planned parenthood tissue health videos"     
##  [8] "parenthood planned children years family"    
##  [9] "obama president federal people tax"          
## [10] "women sex school university writer"
# vectorize them
t.2010 <- as.vector(topics.labels.2010)
t.2011 <- as.vector(topics.labels.2011)
t.2012 <- as.vector(topics.labels.2012)
t.2013 <- as.vector(topics.labels.2013)
t.2014 <- as.vector(topics.labels.2014)
t.2015 <- as.vector(topics.labels.2015)

# view all the topics as they change over the years
topics.over.time <- cbind(t.2010, t.2011, t.2012, t.2013, t.2014, t.2015)

# look at each topic individually -- the first topic over the years
topics.over.time[1, ]
##                                     t.2010 
## "abortion women abortions doctors clinics" 
##                                     t.2011 
##       "abortion women state law abortions" 
##                                     t.2012 
##           "abortion women state law court" 
##                                     t.2013 
##     "abortion women state abortions texas" 
##                                     t.2014 
##           "abortion women court law state" 
##                                     t.2015 
##           "abortion women law court state"
# the second!
topics.over.time[2, ]
##                                   t.2010 
##     "life people make control nietzsche" 
##                                   t.2011 
##       "people life vernacchio make good" 
##                                   t.2012 
## "religious life catholic people control" 
##                                   t.2013 
##         "people life time religious gay" 
##                                   t.2014 
##          "religious life book good time" 
##                                   t.2015 
##             "people life make good talk"
# the third
topics.over.time[3, ]
##                                         t.2010 
##         "republican house senate senator cruz" 
##                                         t.2011 
##  "house republicans republican democrats bill" 
##                                         t.2012 
##     "house republican republicans senate bill" 
##                                         t.2013 
##     "bill senate republican house republicans" 
##                                         t.2014 
## "republican house christie republicans senate" 
##                                         t.2015 
##   "republican trump republicans house senator"
# the fourth
topics.over.time[4, ]
##                                  t.2010 
##       "street art museum brooklyn city" 
##                                  t.2011 
##        "street weiner city east tanton" 
##                                  t.2012 
## "street city brinker foundation vaughn" 
##                                  t.2013 
##  "church brooklyn queens city thompson" 
##                                  t.2014 
##               "sage city art dance ned" 
##                                  t.2015 
##          "deace street art church city"
# the fifth
topics.over.time[5, ]
##                                t.2010 
##  "keefe wetmore people black solondz" 
##                                t.2011 
##         "keefe man people told asked" 
##                                t.2012 
##            "people told day man room" 
##                                t.2013 
##           "people day home told play" 
##                                t.2014 
##         "dunham people woman day man" 
##                                t.2015 
## "people gun shooting police colorado"
# the sixth
topics.over.time[6, ]
##                                             t.2010 
## "campaign political brown conservative democratic" 
##                                             t.2011 
##       "republican party voters political campaign" 
##                                             t.2012 
##          "romney women republican campaign voters" 
##                                             t.2013 
##           "republican party campaign voters women" 
##                                             t.2014 
##      "women republican voters democratic election" 
##                                             t.2015 
##     "campaign fiorina republican party candidates"
# the seventh
topics.over.time[7, ]
##                                            t.2010 
## "planned parenthood health organization services" 
##                                            t.2011 
##        "planned parenthood health services money" 
##                                            t.2012 
##          "planned parenthood komen health cancer" 
##                                            t.2013 
##         "parenthood health planned cancer breast" 
##                                            t.2014 
##         "planned parenthood health services care" 
##                                            t.2015 
##         "planned parenthood tissue health videos"
# the eighth
topics.over.time[8, ]
##                                        t.2010 
##     "family godfrey planned years fellowship" 
##                                        t.2011 
##       "school planned law parenthood husband" 
##                                        t.2012 
## "children school kimbrough family university" 
##                                        t.2013 
##       "planned family mother parenthood died" 
##                                        t.2014 
##           "years planned ny husband children" 
##                                        t.2015 
##    "parenthood planned children years family"
# the ninth
topics.over.time[9, ]
##                                           t.2010 
##      "bank president military giannoulias money" 
##                                           t.2011 
##            "budget obama cuts federal president" 
##                                           t.2012 
## "romney obama president administration santorum" 
##                                           t.2013 
##  "obama president administration government tax" 
##                                           t.2014 
##             "insurance obama care pay president" 
##                                           t.2015 
##             "obama president federal people tax"
# the tenth
topics.over.time[10, ]
##                                          t.2010 
## "sex education abstinence university institute" 
##                                          t.2011 
##             "sex women education school sexual" 
##                                          t.2012 
##            "women sex students education young" 
##                                          t.2013 
##           "women gilbert sex percent education" 
##                                          t.2014 
##                   "women sex school data found" 
##                                          t.2015 
##            "women sex school university writer"

We can represent this relationship visually, as follows:

# with the wordcloud package
topic.num <- 1
num.top.words<-100
topic.top.words <- mallet.top.words(topic.model, topic.words[1,], 100)
wordcloud(topic.top.words$words, topic.top.words$weights, c(4,.8), rot.per=0, random.order=F, colors = "red", ordered.colors = T)

num.topics<-10
num.top.words<-25
for(i in 1:num.topics){
  topic.top.words <- mallet.top.words(topic.model, topic.words[i,], num.top.words)
  wordcloud(topic.top.words$words, topic.top.words$weights, c(4,.8), rot.per=0, random.order=F, colors = "red", ordered.colors = T)
}

And creating a cluster dendogram.

# from http://www.cs.princeton.edu/~mimno/R/clustertrees.R

# transpose and normalize the doc topics
topic.docs <- t(doc.topics)
topic.docs <- topic.docs / rowSums(topic.docs)
write.csv(topic.docs, "/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Results/topic-docs.csv")

# Get a vector containing short names for the topics
topics.labels <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels[topic] <- paste(mallet.top.words(topic.model, topic.words[topic,],
                                                                         num.top.words=5)$words, collapse=" ")
# have a look at keywords for each topic
topics.labels
##  [1] "abortion women state law court"           
##  [2] "life people religious make good"          
##  [3] "house republican republicans senate bill" 
##  [4] "street city church brooklyn art"          
##  [5] "people man day told gun"                  
##  [6] "republican women campaign romney party"   
##  [7] "planned parenthood health services care"  
##  [8] "family planned school years parenthood"   
##  [9] "obama president romney federal government"
## [10] "women sex education school young"
write.csv(topics.labels, "/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Results/topic-labels.csv")

# create data.frame with columns as docs and rows as topics
topic_docs <- data.frame(topic.docs)
names(topic_docs) <- pp.2010$id

# cluster based on shared words
plot(hclust(dist(topic.words)), labels=topics.labels)

Now we can complete this analysis on a larger subset of the data, from 1982 to the present.

set.seed(12345)

# load the libraries we will need for this section
library(mallet) # a wrapper around the Java machine learning tool MALLET
library(wordcloud) # to visualize wordclouds

# subset the data for 1982 and later, the dates for which we have the complete data
pp.1982 <- subset(pp, pp$year > 1982)

# we first have to create an 'id' column
pp.1982$id <- rownames(pp.1982)

# remove punctuation
pp.1982$TEXT <- gsub(pattern="[[:punct:]]",replacement=" ", pp.1982$TEXT)

# load data into mallet
mallet.instances <- mallet.import(pp.1982$id, pp.1982$TEXT, "/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Data/stoplist.csv", FALSE, token.regexp="[\\p{L}']+")

# decide what number of topics to model
n.topics = 10

# create a topic trainer object.
topic.model <- MalletLDA(n.topics)

# load our documents
topic.model$loadDocuments(mallet.instances)

# get the vocabulary, and some statistics about word frequencies. these may be useful in further curating the stopword list.
vocabulary <- topic.model$getVocabulary()
word.freqs <- mallet.word.freqs(topic.model)

# examine some of the vocabulary
word.freqs[1:50,]
##             words term.freq doc.freq
## 1         richard        76       63
## 2       schweiker        26        8
## 3       secretary       511      334
## 4          health      5600     1696
## 5           human      1064      603
## 6        services      2391     1105
## 7           today      2099      948
## 8     recommended       111       99
## 9            rule       707      329
## 10      requiring       367      264
## 11         family      3902     1504
## 12       planning      1954      763
## 13        clinics      2772      844
## 14      supported       481      391
## 15        federal      3839     1300
## 16          money      2250      932
## 17         notify       118       90
## 18        parents      1291      529
## 19         minors       151       99
## 20        receive       458      363
## 21          birth      2013      808
## 22        control      2153      968
## 23          pills       560      174
## 24     diaphragms        35       27
## 25   intrauterine        71       53
## 26        devices       155      102
## 27        planned      7738     3773
## 28     parenthood      7559     3807
## 29     federation      1041      803
## 30        america      1607     1096
## 31       threaten        45       45
## 32           teen      1160      295
## 33          agers       592      195
## 34       families       607      376
## 35    immediately       256      233
## 36          moved       476      371
## 37       district       948      515
## 38          court      7542     1201
## 39          block       433      299
## 40 implementation        22       17
## 41        grounds       194      163
## 42       violated       146      118
## 43       statutes        80       53
## 44   constitution       550      232
## 45      guarantee       125      102
## 46       invasion        26       24
## 47        privacy       412      228
## 48       approved       554      338
## 49         office      1472      813
## 50     management       261      189
# the most frequently used words
word.freqs.ordered <- word.freqs[order(-word.freqs$term.freq), ]
head(word.freqs.ordered)
##          words term.freq doc.freq
## 98    abortion     16465     1952
## 149      women     10390     2042
## 27     planned      7738     3773
## 28  parenthood      7559     3807
## 38       court      7542     1201
## 560      state      6152     1642
# optimize hyperparameters every 20 iterations, after 50 burn-in iterations.
topic.model$setAlphaOptimization(20, 50)

# now train a model. Note that hyperparameter optimization is on, by default. We can specify the number of iterations. Here we'll use a large-ish round number.
topic.model$train(100)

# get the probability of topics in documents and the probability of words in topics; by default the functions return word counts, so to get the probabilities we can normalize and add smoothing, in order to ensure that nothing has a probability of exactly 0
doc.topics <- mallet.doc.topics(topic.model, smoothed=T, normalized=T)
topic.words <- mallet.topic.words(topic.model, smoothed=T, normalized=T)

# what are the top words in topic 7?
mallet.top.words(topic.model, topic.words[6,])
##         words     weights
## 1     planned 0.011511540
## 2  parenthood 0.010029485
## 3  university 0.009953698
## 4       years 0.007747456
## 5      school 0.007419046
## 6     college 0.007048532
## 7      family 0.006922221
## 8        died 0.006610652
## 9       board 0.005945411
## 10  president 0.005886466
# Get a vector containing short names for the topics
topics.labels <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels[topic] <- paste(mallet.top.words(topic.model, topic.words[topic,], num.top.words=5)$words, collapse=" ")

# have a look at keywords for each topic
topics.labels
##  [1] "life people church public religious"         
##  [2] "street tickets center avenue art"            
##  [3] "people time year years day"                  
##  [4] "million health administration money united"  
##  [5] "republican president house obama republicans"
##  [6] "planned parenthood university years school"  
##  [7] "court law justice abortion supreme"          
##  [8] "abortion abortions planned parenthood women" 
##  [9] "health family planned sex school"            
## [10] "women birth drug control percent"
# show the first few document titles with at least .25 of its content devoted to topic 1
head(pp.1982$TITLE[ doc.topics[1,] > 0.25 ],10)
##  [1] U.S. TO REQUIRE NOTICE TO PARENTS IF CHILDREN RECEIVE CONTRACEPTIVES                          
##  [2] STATE SENATE PASSES NEW FINANCIAL-DISCLOSURE BILL                                             
##  [3] JOHN P. BLAIR, FOUNDER OF COMPANY WITH VARIED INTERESTS, DIES AT 83                           
##  [4] TOWN HOUSES IN HARLEM ATTRACTING BUYERS                                                       
##  [5] WESTCHESTER GUIDE                                                                             
##  [6] THE U.S. SHOULD FUND NEITHER LEFT NOR RIGHT                                                   
##  [7] ART VIEW; A COLLECTION THAT BREATHES THE SPIRIT OF MODERNISM                                  
##  [8] THE ORIGIN OF A PLAY                                                                          
##  [9] AROUND THE NATION ; 1982 U.S. Abortion Total Shows Small Decline By United Press International
## [10] POPULATION GROWTH: HOW U.S. POLICY EVOLVED                                                    
## 3791 Levels: 'CONSCIENCE' OF CONSERVATIVES GOES ON THE ATTACK ...
# create a vector that has the title of the most representative text for each topic
topics.articles <- rep("", n.topics)
for (i in 1:n.topics) topics.articles[i] <- paste(pp.1982[which.max(doc.topics[i, ]), ]$TITLE)

# weirdly, many of the topics have the same text that is most representative
topics.articles
##  [1] "U.S. TO REQUIRE NOTICE TO PARENTS IF CHILDREN RECEIVE CONTRACEPTIVES"
##  [2] "BIRTH-CONTROL RULE: CLINICS PONDER EFFECTS"                          
##  [3] "WOMEN SEEK ABORTION LOANS"                                           
##  [4] "WOMEN SEEK ABORTION LOANS"                                           
##  [5] "U.S. TO REQUIRE NOTICE TO PARENTS IF CHILDREN RECEIVE CONTRACEPTIVES"
##  [6] "A BACKDOOR ASSAULT ON FAMILY PLANNING"                               
##  [7] "A LEGISLATIVE BATTLE IN PENNSYLVANIA"                                
##  [8] "A BACKDOOR ASSAULT ON FAMILY PLANNING"                               
##  [9] "A LEGISLATIVE BATTLE IN PENNSYLVANIA"                                
## [10] "U.S. TO REQUIRE NOTICE TO PARENTS IF CHILDREN RECEIVE CONTRACEPTIVES"
# now let's look at how topics differ across different years?
topic.words.1983 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1983, smoothed=T, normalized=T)
topic.words.1984 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1984, smoothed=T, normalized=T)
topic.words.1985 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1985, smoothed=T, normalized=T)
topic.words.1986 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1986, smoothed=T, normalized=T)
topic.words.1987 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1987, smoothed=T, normalized=T)
topic.words.1988 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1988, smoothed=T, normalized=T)
topic.words.1989 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1989, smoothed=T, normalized=T)
topic.words.1990 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1990, smoothed=T, normalized=T)
topic.words.1991 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1991, smoothed=T, normalized=T)
topic.words.1992 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1992, smoothed=T, normalized=T)
topic.words.1993 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1993, smoothed=T, normalized=T)
topic.words.1994 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1994, smoothed=T, normalized=T)
topic.words.1995 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1995, smoothed=T, normalized=T)
topic.words.1996 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1996, smoothed=T, normalized=T)
topic.words.1997 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1997, smoothed=T, normalized=T)
topic.words.1998 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1998, smoothed=T, normalized=T)
topic.words.1999 <- mallet.subset.topic.words(topic.model, pp.1982$year == 1999, smoothed=T, normalized=T)
topic.words.2000 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2000, smoothed=T, normalized=T)
topic.words.2001 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2001, smoothed=T, normalized=T)
topic.words.2002 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2002, smoothed=T, normalized=T)
topic.words.2003 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2003, smoothed=T, normalized=T)
topic.words.2004 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2004, smoothed=T, normalized=T)
topic.words.2005 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2005, smoothed=T, normalized=T)
topic.words.2006 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2006, smoothed=T, normalized=T)
topic.words.2007 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2007, smoothed=T, normalized=T)
topic.words.2008 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2008, smoothed=T, normalized=T)
topic.words.2009 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2009, smoothed=T, normalized=T)
topic.words.2010 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2010, smoothed=T, normalized=T)
topic.words.2011 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2011, smoothed=T, normalized=T)
topic.words.2012 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2012, smoothed=T, normalized=T)
topic.words.2013 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2013, smoothed=T, normalized=T)
topic.words.2014 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2014, smoothed=T, normalized=T)
topic.words.2015 <- mallet.subset.topic.words(topic.model, pp.1982$year == 2015, smoothed=T, normalized=T)

topics.labels.1983 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1983[topic] <- paste(mallet.top.words(topic.model, topic.words.1983[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1984 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1984[topic] <- paste(mallet.top.words(topic.model, topic.words.1984[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1985 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1985[topic] <- paste(mallet.top.words(topic.model, topic.words.1985[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1986 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1986[topic] <- paste(mallet.top.words(topic.model, topic.words.1986[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1987 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1987[topic] <- paste(mallet.top.words(topic.model, topic.words.1987[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1988 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1988[topic] <- paste(mallet.top.words(topic.model, topic.words.1988[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1989 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1989[topic] <- paste(mallet.top.words(topic.model, topic.words.1989[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1990 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1990[topic] <- paste(mallet.top.words(topic.model, topic.words.1990[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1991 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1991[topic] <- paste(mallet.top.words(topic.model, topic.words.1991[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1992 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1992[topic] <- paste(mallet.top.words(topic.model, topic.words.1992[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1993 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1993[topic] <- paste(mallet.top.words(topic.model, topic.words.1993[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1994 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1994[topic] <- paste(mallet.top.words(topic.model, topic.words.1994[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1995 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1995[topic] <- paste(mallet.top.words(topic.model, topic.words.1995[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1996 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1996[topic] <- paste(mallet.top.words(topic.model, topic.words.1996[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1997 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1997[topic] <- paste(mallet.top.words(topic.model, topic.words.1997[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1998 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1998[topic] <- paste(mallet.top.words(topic.model, topic.words.1998[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.1999 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.1999[topic] <- paste(mallet.top.words(topic.model, topic.words.1999[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2000 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2000[topic] <- paste(mallet.top.words(topic.model, topic.words.2000[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2001 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2001[topic] <- paste(mallet.top.words(topic.model, topic.words.2001[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2002 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2002[topic] <- paste(mallet.top.words(topic.model, topic.words.2002[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2003 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2003[topic] <- paste(mallet.top.words(topic.model, topic.words.2003[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2004 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2004[topic] <- paste(mallet.top.words(topic.model, topic.words.2004[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2005 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2005[topic] <- paste(mallet.top.words(topic.model, topic.words.2005[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2006 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2006[topic] <- paste(mallet.top.words(topic.model, topic.words.2006[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2007 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2007[topic] <- paste(mallet.top.words(topic.model, topic.words.2007[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2008 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2008[topic] <- paste(mallet.top.words(topic.model, topic.words.2008[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2009 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2009[topic] <- paste(mallet.top.words(topic.model, topic.words.2009[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2010 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2010[topic] <- paste(mallet.top.words(topic.model, topic.words.2010[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2011 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2011[topic] <- paste(mallet.top.words(topic.model, topic.words.2011[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2012 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2012[topic] <- paste(mallet.top.words(topic.model, topic.words.2012[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2013 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2013[topic] <- paste(mallet.top.words(topic.model, topic.words.2013[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2014 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2014[topic] <- paste(mallet.top.words(topic.model, topic.words.2014[topic,], num.top.words=5)$words, collapse=" ")

topics.labels.2015 <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels.2015[topic] <- paste(mallet.top.words(topic.model, topic.words.2015[topic,], num.top.words=5)$words, collapse=" ")

# vectorize them
t.1983 <- as.vector(topics.labels.1983)
t.1984 <- as.vector(topics.labels.1984)
t.1985 <- as.vector(topics.labels.1985)
t.1986 <- as.vector(topics.labels.1986)
t.1987 <- as.vector(topics.labels.1987)
t.1988 <- as.vector(topics.labels.1988)
t.1989 <- as.vector(topics.labels.1989)
t.1990 <- as.vector(topics.labels.1990)
t.1991 <- as.vector(topics.labels.1991)
t.1992 <- as.vector(topics.labels.1992)
t.1993 <- as.vector(topics.labels.1993)
t.1994 <- as.vector(topics.labels.1994)
t.1995 <- as.vector(topics.labels.1995)
t.1996 <- as.vector(topics.labels.1996)
t.1997 <- as.vector(topics.labels.1997)
t.1998 <- as.vector(topics.labels.1998)
t.1999 <- as.vector(topics.labels.1999)
t.2001 <- as.vector(topics.labels.2001)
t.2002 <- as.vector(topics.labels.2002)
t.2003 <- as.vector(topics.labels.2003)
t.2004 <- as.vector(topics.labels.2004)
t.2005 <- as.vector(topics.labels.2005)
t.2000 <- as.vector(topics.labels.2000)
t.2001 <- as.vector(topics.labels.2001)
t.2002 <- as.vector(topics.labels.2002)
t.2003 <- as.vector(topics.labels.2003)
t.2004 <- as.vector(topics.labels.2004)
t.2005 <- as.vector(topics.labels.2005)
t.2006 <- as.vector(topics.labels.2006)
t.2007 <- as.vector(topics.labels.2007)
t.2008 <- as.vector(topics.labels.2008)
t.2009 <- as.vector(topics.labels.2009)
t.2010 <- as.vector(topics.labels.2010)
t.2011 <- as.vector(topics.labels.2011)
t.2012 <- as.vector(topics.labels.2012)
t.2013 <- as.vector(topics.labels.2013)
t.2014 <- as.vector(topics.labels.2014)
t.2015 <- as.vector(topics.labels.2015)

# create a matrix with all the topics over time
topics.over.time <- cbind(t.1983, t.1984, t.1985, t.1986, t.1987, t.1988, t.1989, t.1990, t.1991, t.1992, t.1993, t.1994, t.1995, t.1996, t.1997, t.1998, t.1999, t.2000, t.2001, t.2002, t.2003, t.2004, t.2005, t.2006, t.2007, t.2008, t.2009, t.2010, t.2011, t.2012, t.2013, t.2014, t.2015)

Now we can look at how the topics have changed over the years, to see if Planned Parenthood has become a more politicized issue over time, or perhaps during certain election cycles.

# look at each topic individually -- the first topic over the years
topics.over.time[1, ]
##                                      t.1983 
##           "life human role movement people" 
##                                      t.1984 
##    "church life catholic ferraro political" 
##                                      t.1985 
##          "church people bishop life public" 
##                                      t.1986 
##      "catholic church people life oriented" 
##                                      t.1987 
##        "life people world church religious" 
##                                      t.1988 
##    "church churches black bishop robertson" 
##                                      t.1989 
##      "life people fulghum wattleton church" 
##                                      t.1990 
##       "schroeder people life church issues" 
##                                      t.1991 
##       "church people catholic members life" 
##                                      t.1992 
##         "people ireland life public rights" 
##                                      t.1993 
##           "louise mero suicide life church" 
##                                      t.1994 
##         "people life black suicide vatican" 
##                                      t.1995 
##     "catholic church people life political" 
##                                      t.1996 
##          "gay life religious people church" 
##                                      t.1997 
##    "suicide assisted life people political" 
##                                      t.1998 
##              "ross book life people bishop" 
##                                      t.1999 
##        "life movement book people personal" 
##                                      t.2000 
##          "gore church people catholic life" 
##                                      t.2001 
##  "pedreira religious people baptist public" 
##                                      t.2002 
##           "people life ideas social ethnic" 
##                                      t.2003 
##            "people sex life moral personal" 
##                                      t.2004 
##        "church american public life people" 
##                                      t.2005 
##     "people life church conservative faith" 
##                                      t.2006 
##              "life war church power people" 
##                                      t.2007 
##    "life liberal people political marriage" 
##                                      t.2008 
##         "religious book people church life" 
##                                      t.2009 
##   "life people conservative church liberal" 
##                                      t.2010 
## "black wetmore life nietzsche conservative" 
##                                      t.2011 
##          "life social public people church" 
##                                      t.2012 
##     "religious people life catholic social" 
##                                      t.2013 
##           "gay life marriage public people" 
##                                      t.2014 
##         "dunham book life social religious" 
##                                      t.2015 
##           "life people pope public opinion"
# the second!
topics.over.time[2, ]
##                                       t.1983 
##             "harlem street city shop avenue" 
##                                       t.1984 
##              "center art sale arts tremaine" 
##                                       t.1985 
##          "papp street theater festival park" 
##                                       t.1986 
##             "street thrift avenue shop east" 
##                                       t.1987 
##         "street east manhattan artists city" 
##                                       t.1988 
## "graffiti city street assemblyman manhattan" 
##                                       t.1989 
##      "street tickets manhattan park theater" 
##                                       t.1990 
##         "museum steichen street river house" 
##                                       t.1991 
##       "street tickets benefit dinner avenue" 
##                                       t.1992 
##       "tickets street benefit party fashion" 
##                                       t.1993 
##          "tickets street rock benefit music" 
##                                       t.1994 
##       "tickets street benefit dinner avenue" 
##                                       t.1995 
##          "street tickets kitchen dinner art" 
##                                       t.1996 
##      "kassindja patriarch hall miss kpalime" 
##                                       t.1997 
##              "center art museum street city" 
##                                       t.1998 
##           "tickets street hours avenue july" 
##                                       t.1999 
##         "tickets benefit street june dinner" 
##                                       t.2000 
##           "island raven fund barbash harbor" 
##                                       t.2001 
##            "island street today center film" 
##                                       t.2002 
##          "scheide music street library park" 
##                                       t.2003 
##    "guttenberg film street documentary west" 
##                                       t.2004 
##       "cooke street cooking dinner hamptons" 
##                                       t.2005 
##            "club village city zoning malloy" 
##                                       t.2006 
##          "artists street art east manhattan" 
##                                       t.2007 
##             "street hours road center hyder" 
##                                       t.2008 
##        "fossella stewart hammer hall island" 
##                                       t.2009 
##          "street songs petrusich east music" 
##                                       t.2010 
##               "solondz street art de greene" 
##                                       t.2011 
##         "street weiner east music petrusich" 
##                                       t.2012 
##          "street city music brooklyn center" 
##                                       t.2013 
##    "church brooklyn queens thompson attends" 
##                                       t.2014 
##              "art dance street film goldwyn" 
##                                       t.2015 
##                "street art show west friday"
# the third
topics.over.time[3, ]
##                             t.1983                             t.1984 
##        "town year years good time"         "years year day time told" 
##                             t.1985                             t.1986 
##      "bours time people home year"    "time people police bomb years" 
##                             t.1987                             t.1988 
##  "malvasi people fire time police"  "people time year volunteers day" 
##                             t.1989                             t.1990 
##       "people time day years back"       "people day years time year" 
##                             t.1991                             t.1992 
##    "home people police time years"       "people time year years day" 
##                             t.1993                             t.1994 
##     "time people years mother day"      "people year time job office" 
##                             t.1995                             t.1996 
##    "people year police years time"       "people time men year years" 
##                             t.1997                             t.1998 
##        "people year time work men" "time people years mccaughey year" 
##                             t.1999                             t.2000 
##      "time kopp people year years"   "nytimes people year time years" 
##                             t.2001                             t.2002 
##    "anthrax people back kopp work"    "police people nelson day fire" 
##                             t.2003                             t.2004 
##      "people kopp years home time"      "year people time child girl" 
##                             t.2005                             t.2006 
##       "people man time years told"      "time people years year home" 
##                             t.2007                             t.2008 
##       "people told time year home"      "shirt people years year job" 
##                             t.2009                             t.2010 
##  "ivins anthrax case years people"        "keefe time years year day" 
##                             t.2011                             t.2012 
##       "people keefe told man time"       "people told time day years" 
##                             t.2013                             t.2014 
##     "gilbert people time year day"        "people time year work day" 
##                             t.2015 
##  "people time shooting gun police"
# the fourth
topics.over.time[4, ]
##                                              t.1983 
##   "federal health reagan administration government" 
##                                              t.1984 
##    "population reagan united policy administration" 
##                                              t.1985 
##   "administration million population united groups" 
##                                              t.1986 
##         "million united company states advertising" 
##                                              t.1987 
##  "administration million federal reagan government" 
##                                              t.1988 
##   "million population united administration reagan" 
##                                              t.1989 
##            "groups million united states president" 
##                                              t.1990 
##        "company million united president companies" 
##                                              t.1991 
##    "million administration government fees federal" 
##                                              t.1992 
##    "united million president states administration" 
##                                              t.1993 
## "administration health president united population" 
##                                              t.1994 
##        "million population health groups insurance" 
##                                              t.1995 
##        "administration president money health plan" 
##                                              t.1996 
##             "million united company american group" 
##                                              t.1997 
##               "money million carey federal council" 
##                                              t.1998 
##           "million company groups fidelity johnson" 
##                                              t.1999 
##            "million company money united companies" 
##                                              t.2000 
##          "million states companies company corzine" 
##                                              t.2001 
##         "groups federal administration money group" 
##                                              t.2002 
##    "administration agency health officials percent" 
##                                              t.2003 
##              "groups million company plan software" 
##                                              t.2004 
##       "administration health federal united agency" 
##                                              t.2005 
##  "agency administration groups president officials" 
##                                              t.2006 
##      "plan million administration president health" 
##                                              t.2007 
##             "million money funds percent president" 
##                                              t.2008 
##    "health million president administration groups" 
##                                              t.2009 
##           "insurance health coverage federal money" 
##                                              t.2010 
##                 "million bank money health federal" 
##                                              t.2011 
##    "federal government health money administration" 
##                                              t.2012 
## "health administration president million insurance" 
##                                              t.2013 
##          "health million administration year money" 
##                                              t.2014 
##            "million insurance health coverage year" 
##                                              t.2015 
##            "government states health money federal"
# the fifth
topics.over.time[5, ]
##                                              t.1983 
##         "republican vote president senate congress" 
##                                              t.1984 
##       "president house republican political issues" 
##                                              t.1985 
##       "senator republican campaign president house" 
##                                              t.1986 
##             "state senate campaign house committee" 
##                                              t.1987 
##        "republican house president issues campaign" 
##                                              t.1988 
##           "bush state president republican senator" 
##                                              t.1989 
##     "president governor republican political state" 
##                                              t.1990 
##              "bill senate governor president house" 
##                                              t.1991 
##         "house republican vote representative bush" 
##                                              t.1992 
##          "bush republican president campaign state" 
##                                              t.1993 
##        "clinton president campaign bill republican" 
##                                              t.1994 
##        "house clinton republican president senator" 
##                                              t.1995 
##      "house bill republican nomination republicans" 
##                                              t.1996 
##        "republican campaign clinton president bill" 
##                                              t.1997 
##           "clinton president house republican vote" 
##                                              t.1998 
##             "bill republican campaign pataki house" 
##                                              t.1999 
##             "governor bill republican bush clinton" 
##                                              t.2000 
##     "bush campaign republican president democratic" 
##                                              t.2001 
##     "mcgreevey bush schundler republican president" 
##                                              t.2002 
##         "forrester bill republican democrats house" 
##                                              t.2003 
##             "senate republican bush bill president" 
##                                              t.2004 
##             "bush president house democratic party" 
##                                              t.2005 
##   "senator republican democrats santorum president" 
##                                              t.2006 
##          "republican president senator senate bush" 
##                                              t.2007 
##           "republican bill obama campaign giuliani" 
##                                              t.2008 
##          "obama republican party senator political" 
##                                              t.2009 
##                 "obama house senate president bill" 
##                                              t.2010 
##     "republican campaign president senator mcmahon" 
##                                              t.2011 
##      "house republican republicans democrats obama" 
##                                              t.2012 
##        "romney obama republican president campaign" 
##                                              t.2013 
##              "republican state party campaign bill" 
##                                              t.2014 
## "republican state republicans democrats democratic" 
##                                              t.2015 
##          "republican republicans trump house obama"
# the sixth
topics.over.time[6, ]
##                                              t.1983 
##        "university chairman planned school college" 
##                                              t.1984 
## "cousins university rockefeller planned parenthood" 
##                                              t.1985 
##        "university planned college school director" 
##                                              t.1986 
##         "college canfield university planned years" 
##                                              t.1987 
##         "president planned years university school" 
##                                              t.1988 
##         "planned president hepburn university city" 
##                                              t.1989 
##        "planned university school years parenthood" 
##                                              t.1990 
##     "university planned menninger years parenthood" 
##                                              t.1991 
##        "university college president years planned" 
##                                              t.1992 
##       "university president college planned dallas" 
##                                              t.1993 
##      "daughter university planned president father" 
##                                              t.1994 
##           "college school years planned university" 
##                                              t.1995 
##      "planned university college parenthood father" 
##                                              t.1996 
##    "husband university planned daughter parenthood" 
##                                              t.1997 
##            "planned parenthood family board norman" 
##                                              t.1998 
##       "university planned parenthood college board" 
##                                              t.1999 
##        "planned parenthood university died nytimes" 
##                                              t.2000 
##       "dyson planned university parenthood nytimes" 
##                                              t.2001 
##         "planned parenthood family board president" 
##                                              t.2002 
##          "board planned parenthood university wife" 
##                                              t.2003 
##                "leon levy planned parenthood board" 
##                                              t.2004 
##                 "parenthood planned ny died school" 
##                                              t.2005 
##                 "board planned rabbi school family" 
##                                              t.2006 
##             "died school university years children" 
##                                              t.2007 
##       "university planned years parenthood college" 
##                                              t.2008 
##              "mott planned parenthood years esther" 
##                                              t.2009 
##           "years school family university director" 
##                                              t.2010 
##         "university family planned parenthood life" 
##                                              t.2011 
##             "planned parenthood school law husband" 
##                                              t.2012 
##      "planned parenthood university school college" 
##                                              t.2013 
##           "planned parenthood family college years" 
##                                              t.2014 
##                  "planned sage ny parenthood years" 
##                                              t.2015 
##        "planned parenthood university years family"
# the seventh
topics.over.time[7, ]
##                                  t.1983 
##    "court judge decision rule abortion" 
##                                  t.1984 
##      "court law abortion washburn case" 
##                                  t.1985 
##      "court state supreme law decision" 
##                                  t.1986 
##     "court abortion state roe decision" 
##                                  t.1987 
##          "court law judge bork supreme" 
##                                  t.1988 
## "court abortion federal judge decision" 
##                                  t.1989 
##      "court abortion roe state supreme" 
##                                  t.1990 
##   "court judge souter abortion supreme" 
##                                  t.1991 
##       "court law abortion supreme case" 
##                                  t.1992 
##       "court justice abortion law case" 
##                                  t.1993 
##    "court law abortion justice supreme" 
##                                  t.1994 
##        "court souter law justice judge" 
##                                  t.1995 
##       "court law case abortion justice" 
##                                  t.1996 
##      "court rehnquist law justice case" 
##                                  t.1997 
##      "court state supreme decision law" 
##                                  t.1998 
##      "court law judge supreme abortion" 
##                                  t.1999 
##       "court justice law blackmun case" 
##                                  t.2000 
##     "court abortion law nebraska state" 
##                                  t.2001 
##    "court connor law ashcroft abortion" 
##                                  t.2002 
##          "court case supreme law judge" 
##                                  t.2003 
##     "court law bowers justice decision" 
##                                  t.2004 
##   "justice court law blackmun abortion" 
##                                  t.2005 
##      "court judge justice law abortion" 
##                                  t.2006 
##         "court justice judge law alito" 
##                                  t.2007 
##    "court stevens justice law abortion" 
##                                  t.2008 
##         "law court judge justice state" 
##                                  t.2009 
##      "court justice judge law abortion" 
##                                  t.2010 
##         "court law supreme case thomas" 
##                                  t.2011 
##      "law court state federal abortion" 
##                                  t.2012 
##          "court law justice state case" 
##                                  t.2013 
##       "court law supreme justice state" 
##                                  t.2014 
##       "court law supreme state justice" 
##                                  t.2015 
##        "court law case supreme justice"
# the eighth
topics.over.time[8, ]
##                                            t.1983 
##      "abortion planned abortions parenthood life" 
##                                            t.1984 
##        "abortion abortions planned clinics women" 
##                                            t.1985 
##       "abortion clinic clinics abortions planned" 
##                                            t.1986 
##     "abortion abortions women parenthood planned" 
##                                            t.1987 
##   "abortion abortions planned parenthood clinics" 
##                                            t.1988 
##     "abortion abortions women planned parenthood" 
##                                            t.1989 
##         "abortion abortions rights women planned" 
##                                            t.1990 
##     "abortion abortions planned parenthood women" 
##                                            t.1991 
##    "abortion clinics abortions parenthood clinic" 
##                                            t.1992 
##         "abortion abortions women clinics rights" 
##                                            t.1993 
##        "abortion abortions clinic clinics rights" 
##                                            t.1994 
##      "abortion clinic parenthood planned clinics" 
##                                            t.1995 
##    "abortion clinic abortions clinics parenthood" 
##                                            t.1996 
##     "abortion abortions parenthood planned salvi" 
##                                            t.1997 
##    "abortion abortions parenthood planned rights" 
##                                            t.1998 
##          "abortion abortions clinic anti doctors" 
##                                            t.1999 
##        "abortion abortions doctors rights clinic" 
##                                            t.2000 
## "abortion abortions mifepristone doctors planned" 
##                                            t.2001 
##     "abortion clinics planned parenthood federal" 
##                                            t.2002 
##    "abortion planned parenthood clinic abortions" 
##                                            t.2003 
##    "abortion abortions rights planned parenthood" 
##                                            t.2004 
##      "abortion records planned rights parenthood" 
##                                            t.2005 
##    "abortion planned parenthood abortions rights" 
##                                            t.2006 
##    "abortion abortions planned parenthood rights" 
##                                            t.2007 
##     "abortion abortions women parenthood planned" 
##                                            t.2008 
##     "abortion grand planned parenthood abortions" 
##                                            t.2009 
##          "abortion tiller abortions clinic women" 
##                                            t.2010 
##   "abortion abortions planned parenthood doctors" 
##                                            t.2011 
##     "abortion abortions planned parenthood state" 
##                                            t.2012 
##     "abortion parenthood planned abortions women" 
##                                            t.2013 
##          "abortion abortions women texas clinics" 
##                                            t.2014 
##         "abortion clinics women abortions clinic" 
##                                            t.2015 
##        "abortion parenthood planned tissue fetal"
# the ninth
topics.over.time[9, ]
##                                       t.1983 
##     "family parents health planned planning" 
##                                       t.1984 
##   "family planning county programs services" 
##                                       t.1985 
##      "family teen planning program services" 
##                                       t.1986 
##         "teen agers school education family" 
##                                       t.1987 
##        "family teen planned school planning" 
##                                       t.1988 
##    "family planning health services program" 
##                                       t.1989 
##              "teen parents aids year health" 
##                                       t.1990 
##            "aids teen health family planned" 
##                                       t.1991 
##        "family planning health program teen" 
##                                       t.1992 
##        "family aids health services planned" 
##                                       t.1993 
##            "school teen program care family" 
##                                       t.1994 
##          "school teen planned family health" 
##                                       t.1995 
##        "health care foster services planned" 
##                                       t.1996 
##              "aids teen family sex children" 
##                                       t.1997 
##    "family care planning planned parenthood" 
##                                       t.1998 
##  "family health services calderone planning" 
##                                       t.1999 
##        "health care school condom education" 
##                                       t.2000 
##        "sex parents health school education" 
##                                       t.2001 
## "family planned programs education services" 
##                                       t.2002 
##           "health services family care city" 
##                                       t.2003 
##            "sex sexual health school family" 
##                                       t.2004 
##       "sex education alberto jasmine health" 
##                                       t.2005 
##    "sex family parents education abstinence" 
##                                       t.2006 
##           "family sex sexual parents health" 
##                                       t.2007 
##    "sex health education planned abstinence" 
##                                       t.2008 
##             "health care school age planned" 
##                                       t.2009 
##           "health family sex care education" 
##                                       t.2010 
##     "education sex health abstinence family" 
##                                       t.2011 
##      "sex health services family parenthood" 
##                                       t.2012 
##      "health planned care parenthood family" 
##                                       t.2013 
##     "health family care parenthood planning" 
##                                       t.2014 
##             "health care planned sex family" 
##                                       t.2015 
##  "health planned parenthood family services"
# the tenth
topics.over.time[10, ]
##                                        t.1983 
##       "women drug sponge contraceptive birth" 
##                                        t.1984 
##              "women birth infant study sperm" 
##                                        t.1985 
##             "women control birth percent men" 
##                                        t.1986 
##             "women birth percent genetic iud" 
##                                        t.1987 
##           "women birth control percent woman" 
##                                        t.1988 
##                "women drug control iud birth" 
##                                        t.1989 
##               "women percent woman drug pill" 
##                                        t.1990 
##               "women drug pill birth control" 
##                                        t.1991 
##          "women norplant birth woman medical" 
##                                        t.1992 
##         "women drug pregnancy norplant birth" 
##                                        t.1993 
##             "women pill control doctor birth" 
##                                        t.1994 
##               "women ru drug percent control" 
##                                        t.1995 
##           "women birth control percent woman" 
##                                        t.1996 
##              "women birth men percent cancer" 
##                                        t.1997 
##    "women pregnancy contraception drug birth" 
##                                        t.1998 
##           "women birth percent woman control" 
##                                        t.1999 
##          "women drug doctors research sponge" 
##                                        t.2000 
##             "women drug pill birth pregnancy" 
##                                        t.2001 
##         "women safe baby birth contraceptive" 
##                                        t.2002 
##          "women cancer pill research control" 
##                                        t.2003 
##              "women drug birth pills control" 
##                                        t.2004 
##             "women drug birth pregnancy pill" 
##                                        t.2005 
##               "women drug pill birth morning" 
##                                        t.2006 
##         "women drug pill birth contraception" 
##                                        t.2007 
##          "women birth research percent woman" 
##                                        t.2008 
##             "women study control risk lifers" 
##                                        t.2009 
##            "women pills study drug dominican" 
##                                        t.2010 
## "women birth pregnancy control contraception" 
##                                        t.2011 
##          "women birth control percent tanton" 
##                                        t.2012 
##      "women komen contraception birth cancer" 
##                                        t.2013 
##            "women cancer breast percent drug" 
##                                        t.2014 
##          "women woman gomperts birth control" 
##                                        t.2015 
##        "women research control woman percent"

The ninth topic is especially interesting—it appears to track scandals or politicized issues that Planned Parenthood is embroiled in. To take a peek at how that has changed over the years, we can see that in 1985, the most common words in the topic were “bours public office called investigation.” In 1993, they were “death suicide public told office.” In 1999, they were “kopp smith web death site.” In 2006, they were “death kline group found called.” In 2012, they were “told video case web kimbrough.” And in 2015, they were “tissue fetal video planned people.” We can also trace changes in the topic that’s about the Supreme Court, in the topic that’s about election, in the topic that’s about sex education—these prove to be very informative topics from which we can build interesting additional research questions!

We can also represent this topics visually, as follows:

# with the wordcloud package
topic.num <- 1
num.top.words<-100
topic.top.words <- mallet.top.words(topic.model, topic.words[1,], 100)
wordcloud(topic.top.words$words, topic.top.words$weights, c(4,.8), rot.per=0, random.order=F, colors = "red", ordered.colors = T)

num.topics<-10
num.top.words<-25

for(i in 1:num.topics){
  topic.top.words <- mallet.top.words(topic.model, topic.words[i,], num.top.words)
  wordcloud(topic.top.words$words, topic.top.words$weights, c(4,.8), rot.per=0, random.order=F, colors = "red", ordered.colors = T)
}

And create a cluster dendogram.

# from http://www.cs.princeton.edu/~mimno/R/clustertrees.R

# transpose and normalize the doc topics
topic.docs <- t(doc.topics)
topic.docs <- topic.docs / rowSums(topic.docs)
write.csv(topic.docs, "/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Results/topic-docs2.csv")

# Get a vector containing short names for the topics
topics.labels <- rep("", n.topics)
for (topic in 1:n.topics) topics.labels[topic] <- paste(mallet.top.words(topic.model, topic.words[topic,],
                                                                         num.top.words=5)$words, collapse=" ")
# have a look at keywords for each topic
topics.labels
##  [1] "life people church public religious"         
##  [2] "street tickets center avenue art"            
##  [3] "people time year years day"                  
##  [4] "million health administration money united"  
##  [5] "republican president house obama republicans"
##  [6] "planned parenthood university years school"  
##  [7] "court law justice abortion supreme"          
##  [8] "abortion abortions planned parenthood women" 
##  [9] "health family planned sex school"            
## [10] "women birth drug control percent"
write.csv(topics.labels, "/Users/elizabeth/Documents/Berkeley/PS239T/ps239T-final-project/Results/topic-labels2.csv")

# create data.frame with columns as docs and rows as topics
topic_docs <- data.frame(topic.docs)
names(topic_docs) <- pp.1982$id

# cluster based on shared words
png('Graphic_PPDendogram.png')
plot(hclust(dist(topic.words)), labels=topics.labels)
dev.off()
## quartz_off_screen 
##                 2